** This Do-File replicates Figures 4 and 5 from P. Dutronc, A. Tondini "Large Means-Tested Pensions with Informal Labor Markets" ** 

set more off

cd "" /*Input here directory where the PALMS v3.2 (only years from 2002--2015) is stored */ 

use "south_africa_palms.dta", clear


**************************************
** Select Population for Estimation ** 
**************************************

drop if year==2008 | year==2009 /* Exclude years in which the threshold is lowered from 65 to 60 */ 

keep if pop_group==1 | pop_group==2  /* Keep Black and Coloured individuals only, see text for explanation */ 

drop if sex==. | sex==9 /* Exclude observations with missing value for gender */ 

gen married=(marital_status==1) /* 1/0 variable for marital status */ 

keep if age>=55 & age<65 /*Window +/- 5 */ 

gen post=(year>2008) /* Indicator Variable for the period after the reforms */ 


** Adjust weights for multiple year estimation ** 

bys year: egen mean_weight=mean(weight) 
replace weight=weight/mean_weight /* Adjust weights to have the same mean across waves */

** Definition of Informal Employment **

gen tot_inf=1 if status==1 & no_written_contract==1 /*Employees w/o written contract */
replace tot_inf=0 if status==1 & no_written_contract==0  /*Employees w. written contract */
replace tot_inf=1 if status==2 & not_registered==1 /*Self-employed w/o registered business*/
replace tot_inf=0 if status==2 & not_registered==0 /*Self-employed w. registered business*/
replace tot_inf=0 if status==3 | status==4 /*Not Employed*/ 

keep if tot_inf!=. /* Exclude from the estimation those with missing informality status.*/

gen f_emp=(tot_inf==0 & emp==1)


**  Wages and Hours ** 


*** Set Earnings in 2010 Rand ** 

gen cpi_index=69.3 if year==2002
replace cpi_index=73.3 if year==2003
replace cpi_index=72.8 if year==2004
replace cpi_index=74.3 if year==2005
replace cpi_index=76.7 if year==2006
replace cpi_index=81.4 if year==2007
replace cpi_index=89.6 if year==2008
replace cpi_index=96.1 if year==2009
replace cpi_index=100.0 if year==2010
replace cpi_index=105.0 if year==2011
replace cpi_index=111.1 if year==2012
replace cpi_index=117.5 if year==2013
replace cpi_index=124.7 if year==2014
replace cpi_index=130.3 if year==2015

replace monthly_sal=monthly_sal * (100/cpi_index)


replace hrs_wrk=0 if status==3 | status==4 /* Set hours equal to zero if not employed */
replace monthly_sal=0 if status==3 | status==4 /* Set monthly salary equal to zero if not employed */

gen hrs_wrk_f=hrs_wrk if f_emp==1 /* hours of work in formal employment */
replace hrs_wrk_f=0 if f_emp==0 /* hours of work in formal employment */

gen hourly_wage=monthly_sal/(hrs_wrk*4.5)

** Quartiles of Hourly Wages in Formal and Informal Employment, Before and After ** 
** Before **

summarize hourly_wage if tot_inf==1 & sex==1 & post==0 & age>=55 & age<60 [aw=weight], det
display r(p1)*43*4.5 
display r(p25)*43*4.5 
display r(p50)*43*4.5 
display r(p75)*43*4.5 

** After **

summarize hourly_wage if tot_inf==1 & sex==1 & post==1 & age>=55 & age<60 [aw=weight], det
display r(p1)*43*4.5 
display r(p25)*43*4.5 
display r(p50)*43*4.5 
display r(p75)*43*4.5 

replace monthly_sal=ln(monthly_sal) 
replace hourly_wage=ln(hourly_wage)


drop if hourly_wage==. & emp==1 /* Exclude if salary or hours information is missing. This also excludes the few observations with 0 hourly wage, given the log transformation*/ 



**************************************
************ Regressions ** **********
**************************************

gen OAP_1=(age>=60) /* Indicator variable for being at or above the threshold */ 

gen x=age-60 /* center discontinuity at 0 */ 

local f_form x
*local controls i.Province i.married i.education /* set of control variables */ 


forvalues j= 1 (1) 4 { 

summarize hourly_wage if tot_inf==1 & sex==1 & post==0 & age>=55 & age<60 [aw=weight], det


preserve 

replace tot_inf=0 if emp==1 & (hourly_wage>=r(p25)) & `j'==1
replace tot_inf=0 if emp==1 & (hourly_wage<r(p25) | hourly_wage>r(p50)) & `j'==2
replace tot_inf=0 if emp==1 & (hourly_wage<r(p50) | hourly_wage>r(p75)) & `j'==3
replace tot_inf=0 if emp==1 & (hourly_wage<r(p75)) & `j'==4

reg tot_inf i.pop_group i.year c.(`f_form') c.(`f_form')#1.OAP_1 1.OAP_1 `controls' [w=weight] if sex==1 & post==0, robust
parmest, label list(parm estimate min95 max95) level(95)  saving(window_b_`j', replace)

restore 
}
forvalues j= 1 (1) 4 { 

summarize hourly_wage if tot_inf==1 & sex==1 & post==1 & age>=55 & age<60 [aw=weight], det

preserve 

replace tot_inf=0 if emp==1 & (hourly_wage>=r(p25)) & `j'==1
replace tot_inf=0 if emp==1 & (hourly_wage<r(p25) | hourly_wage>r(p50)) & `j'==2
replace tot_inf=0 if emp==1 & (hourly_wage<r(p50) | hourly_wage>r(p75)) & `j'==3
replace tot_inf=0 if emp==1 & (hourly_wage<r(p75)) & `j'==4



reg tot_inf i.pop_group i.year c.(`f_form') c.(`f_form')#1.OAP_1 1.OAP_1 `controls'  [w=weight] if sex==1 & post==1, robust
parmest, label list(parm estimate min95 max95) level(95)  saving(window_a_`j', replace)

restore 
}

preserve 

use window_b_1.dta, clear
keep if parm=="1.OAP_1"
gen wage=1

forvalues j= 2 (1) 4 { 
append using window_b_`j'.dta
keep if parm=="1.OAP_1"
replace wage=`j' if wage==. 

}
gen outcome="before"
forvalues j= 1 (1) 4 { 
append using window_a_`j'.dta
keep if parm=="1.OAP_1"
replace wage=`j' if wage==. 

}
replace outcome="after" if outcome==""

keep if wage>=1 & wage<=4


label define wage_b 1 "R104- R597" 2 "R597- R1149" 3 "R1149- R2255" 4 "R2255+" 
label values wage wage_b 



***********************
**Figure 4 - Panel a **
***********************

twoway (scatter estimate wage if outcome=="before", msize(large) mcolor(black)) || (line estimate wage if outcome=="before", sort lcolor(black) ) /// 
(rcap min95 max95 wage if outcome=="before", lcolor(black) )  /// 
||, xlabel(1 2 3 4 , valuelabel) legend(off) graphregion( color(white) ) plotregion(fcolor(white) ) ylabel(-0.07 (0.01) 0.02) yline(0, lcolor(black)) xtitle("")



label define wage_a 1 "R101- R934" 2 "R934- R1786" 3 "R1786- R3523" 4 "R3523+" 
label values wage wage_a 

***********************
**Figure 4 - Panel b **
***********************

twoway (scatter estimate wage if outcome=="after", msize(large) mcolor(black)) || (line estimate wage if outcome=="after", sort lcolor(black) lpattern(dash) ) /// 
(rcap min95 max95 wage if outcome=="after", lcolor(black) lpattern(dash) )  /// 
||, xlabel(1 2 3 4 , valuelabel) legend(off) graphregion( color(white) ) plotregion(fcolor(white) )  ylabel(-0.07 (0.01) 0.02) yline(0, lcolor(black)) xtitle("")

restore 

forvalues j= 1 (1) 4 { 
erase window_a_`j'.dta
erase window_b_`j'.dta
}

********************************************************************************


forvalues j= 1 (1) 4 { 

summarize hourly_wage if tot_inf==1 & sex==1 & post==0 & age>=55 & age<60 [aw=weight], det

preserve 

replace f_emp=0 if emp==1 & (hourly_wage>=r(p25)) & `j'==1
replace f_emp=0 if emp==1 & (hourly_wage<r(p25) | hourly_wage>r(p50)) & `j'==2
replace f_emp=0 if emp==1 & (hourly_wage<r(p50) | hourly_wage>r(p75)) & `j'==3
replace f_emp=0 if emp==1 & (hourly_wage<r(p75)) & `j'==4

reg f_emp i.pop_group i.year c.(`f_form') c.(`f_form')#1.OAP_1 1.OAP_1 `controls' [w=weight] if sex==1 & post==0, robust
parmest, label list(parm estimate min95 max95) level(95)  saving(window_b_`j', replace)

restore 
}
forvalues j= 1 (1) 4 { 

summarize hourly_wage if tot_inf==1 & sex==1 & post==1 & age>=55 & age<60 [aw=weight], det

preserve 

replace f_emp=0 if emp==1 & (hourly_wage>=r(p25)) & `j'==1
replace f_emp=0 if emp==1 & (hourly_wage<r(p25) | hourly_wage>r(p50)) & `j'==2
replace f_emp=0 if emp==1 & (hourly_wage<r(p50) | hourly_wage>r(p75)) & `j'==3
replace f_emp=0 if emp==1 & (hourly_wage<r(p75)) & `j'==4

reg f_emp i.pop_group i.year c.(`f_form') c.(`f_form')#1.OAP_1 1.OAP_1 `controls'  [w=weight] if sex==1 & post==1, robust
parmest, label list(parm estimate min95 max95) level(95)  saving(window_a_`j', replace)

restore 
}

preserve 

use window_b_1.dta, clear
keep if parm=="1.OAP_1"
gen wage=1

forvalues j= 2 (1) 4 { 
append using window_b_`j'.dta
keep if parm=="1.OAP_1"
replace wage=`j' if wage==. 

}
gen outcome="before"
forvalues j= 1 (1) 4 { 
append using window_a_`j'.dta
keep if parm=="1.OAP_1"
replace wage=`j' if wage==. 

}
replace outcome="after" if outcome==""

keep if wage>=1 & wage<=4

label define wage_b 1 "R104- R597" 2 "R597- R1149" 3 "R1149- R2255" 4 "R2255+" 
label values wage wage_b 


***********************
**Figure 5 - Panel a **
***********************

twoway (scatter estimate wage if outcome=="before", msize(large) mcolor(black)) || (line estimate wage if outcome=="before", sort lcolor(black) ) /// 
(rcap min95 max95 wage if outcome=="before", lcolor(black) )  /// 
||, xlabel(1 2 3 4 , valuelabel) legend(off) graphregion( color(white) ) plotregion(fcolor(white) ) ylabel(-0.07 (0.01) 0.02) yline(0, lcolor(black)) xtitle("")

label define wage_a 1 "R101- R934" 2 "R934- R1786" 3 "R1786- R3523" 4 "R3523+" 
label values wage wage_a 


***********************
**Figure 5 - Panel b **
***********************

twoway (scatter estimate wage if outcome=="after", msize(large) mcolor(black)) || (line estimate wage if outcome=="after", sort lcolor(black) lpattern(dash) ) /// 
(rcap min95 max95 wage if outcome=="after", lcolor(black) lpattern(dash) )  /// 
||, xlabel(1 2 3 4 , valuelabel) legend(off) graphregion( color(white) ) plotregion(fcolor(white) )  ylabel(-0.07 (0.01) 0.02) yline(0, lcolor(black)) xtitle("")

restore 

forvalues j= 1 (1) 4 { 
erase window_a_`j'.dta
erase window_b_`j'.dta
}